from pymongo import MongoClient
client = MongoClient(port=27017)
import numpy as np
import pickle as pkl
from tokenizer import tokenize
db=client['tiktok']
hts=[]
sequence_len = 20


for col in db.list_collection_names():
    count=0
    for obj in db[col].find():
        if len(obj['video_feature']['img_embed']) > 0 and len(obj['text_feature']['text']) > 0 and len(
                obj['video_feature']['audio']['yamnet']) > 0 and (
                'var_sb' in obj['video_feature']['editing'].keys()) and (
                'avg_sticker_length' in obj['video_feature']['editing'].keys()) and (
                'avg_scences' in obj['video_feature']['editing'].keys()):
            count+=1
    if count>1000:
        hts.append(col)
print(hts)